clear all
set more off
set mem 10000000
set matsize 10000
version 13

****************************************************************** 
*** Build File to Process Raw Census Concordance (2001 to 2011) **
****************************************************************** 

** Set file paths
do "$path_code/paths.do"

********************************************************************************
********************************************************************************

** 2001-2011 Concordance

* Import concordances to stata
{
clear
cd "$conc"
forvalues x=1/35{
  local st = "`x'"
  if (substr("`x'",2,1)==""){
     local st = "0"+"`x'"
     }
  local fname = "Rdir_2001_MDDS_"+"`st'"+".csv"
  insheet using "`fname'", comma double clear
  if (`x'==1){
    save census_code_matches, replace
  }
  if (`x'>1){
    append using census_code_matches
    save census_code_matches, replace
  }
}  

use "$conc/census_code_matches.dta", clear  
foreach var of varlist *name* {
  replace `var' = trim(itrim(`var'))
}  
compress
assert stc2001==mddsstc

rename stc2001 st_code
rename mddsstc st_code11

rename dtc2001 dt_code
rename mddsdtc dt_code11

rename subdt2001 ta_code /* this is almost always == ta_code1 (VD), but sometimes == ta_code2 (PCA) */
rename mddssub_dt ta_code11

rename plcn2001 vi_code
rename mddsplcn vi_code11

rename nameofstated name_2001
rename mddsname name_2011

drop if dt_code==0 & dt_code11==0
save "$conc/census_code_matches.dta", replace
}

* Clean and reformat
{
use "$conc/census_code_matches.dta", clear
gen temp_block_2001 = upper(trim(itrim(name_2001))) if vi_code==0 & ta_code!=0 
egen block_2001 = mode(temp_block_2001), by(st_code dt_code ta_code)
replace block_2001 = subinstr(subinstr(subinstr(block_2001,"*","",.)," -"," ",.),"- "," ",.)
replace block_2001 = subinstr(subinstr(subinstr(subinstr(block_2001,"( ","(",.)," )",")",.),"-I"," I",.),"-1"," I",.)
replace block_2001 = subinstr(subinstr(subinstr(block_2001,"(T)","",.),"(S.T)","",.),"(M)","",.)
replace block_2001 = subinstr(subinstr(subinstr(block_2001,"C.D.BLOCK","",.),"SUB-DIV.","",.),"SUB-DIVISION","",.)
replace block_2001 = trim(itrim(subinstr(subinstr(subinstr(block_2001,"(P)","",.),"CIRCLE","",.),"CD BLOCK","",.)))
replace block_2001 = "SIRPUR (T)" if block_2001=="SIRPUR" & ta_code==11
rename ta_code bk_code_conc01
rename name_2001 village_conc01
replace village_conc01 = upper(trim(itrim(village_conc01)))

gen temp_block_2011 = upper(trim(itrim(name_2011))) if vi_code11==0 & ta_code11!=0 
egen block_2011 = mode(temp_block_2011), by(st_code11 dt_code11 ta_code11)
replace block_2011 = subinstr(subinstr(subinstr(block_2011,"*","",.)," -"," ",.),"- "," ",.)
replace block_2011 = subinstr(subinstr(subinstr(subinstr(block_2011,"( ","(",.)," )",")",.),"-I"," I",.),"-1"," I",.)
replace block_2011 = subinstr(subinstr(subinstr(block_2011,"(T)","",.),"(S.T)","",.),"(M)","",.)
replace block_2011 = subinstr(subinstr(subinstr(block_2011,"C.D.BLOCK","",.),"SUB-DIV.","",.),"SUB-DIVISION","",.)
replace block_2011 = trim(itrim(subinstr(subinstr(subinstr(block_2011,"(P)","",.),"CIRCLE","",.),"CD BLOCK","",.)))
replace block_2011 = "SIRPUR (T)" if block_2011=="SIRPUR" & ta_code11==4315
rename ta_code11 bk_code_conc11
rename name_2011 village_conc11
replace village_conc11 = upper(trim(itrim(village_conc11)))

drop temp_block_2001 temp_block_2011
drop if vi_code==. & vi_code11==.
drop if vi_code==0 & vi_code11==0
duplicates drop
compress
gen conc_id = _n
save "$conc/census_code_matches_names.dta", replace
}

********************************************************************************
********************************************************************************







